; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown | FileCheck %s -check-prefix=PWR9
; RUN: llc < %s -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown | FileCheck %s -check-prefix=PWR8

@a = internal global fp128 0xL00000000000000000000000000000000, align 16
@x = internal global [4 x fp128] zeroinitializer, align 16
@y = internal global [4 x fp128] zeroinitializer, align 16

define void @fmul_ctrloop_fp128() nounwind {
; PWR9-LABEL: fmul_ctrloop_fp128:
; PWR9:       # %bb.0: # %entry
; PWR9-NEXT:    addis 3, 2, a@toc@ha
; PWR9-NEXT:    li 4, 4
; PWR9-NEXT:    addi 3, 3, a@toc@l
; PWR9-NEXT:    lxv 34, 0(3)
; PWR9-NEXT:    addis 3, 2, y@toc@ha
; PWR9-NEXT:    mtctr 4
; PWR9-NEXT:    addis 4, 2, x@toc@ha
; PWR9-NEXT:    addi 3, 3, y@toc@l
; PWR9-NEXT:    addi 4, 4, x@toc@l
; PWR9-NEXT:    addi 3, 3, -16
; PWR9-NEXT:    addi 4, 4, -16
; PWR9-NEXT:    .p2align 5
; PWR9-NEXT:  .LBB0_1: # %for.body
; PWR9-NEXT:    #
; PWR9-NEXT:    lxv 35, 16(4)
; PWR9-NEXT:    addi 4, 4, 16
; PWR9-NEXT:    xsmulqp 3, 2, 3
; PWR9-NEXT:    stxv 35, 16(3)
; PWR9-NEXT:    addi 3, 3, 16
; PWR9-NEXT:    bdnz .LBB0_1
; PWR9-NEXT:  # %bb.2: # %for.end
; PWR9-NEXT:    blr
;
; PWR8-LABEL: fmul_ctrloop_fp128:
; PWR8:       # %bb.0: # %entry
; PWR8-NEXT:    mflr 0
; PWR8-NEXT:    stdu 1, -112(1)
; PWR8-NEXT:    li 3, 48
; PWR8-NEXT:    std 0, 128(1)
; PWR8-NEXT:    addis 4, 2, x@toc@ha
; PWR8-NEXT:    std 28, 80(1) # 8-byte Folded Spill
; PWR8-NEXT:    std 29, 88(1) # 8-byte Folded Spill
; PWR8-NEXT:    std 30, 96(1) # 8-byte Folded Spill
; PWR8-NEXT:    li 30, 4
; PWR8-NEXT:    addi 4, 4, x@toc@l
; PWR8-NEXT:    li 29, 16
; PWR8-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
; PWR8-NEXT:    addis 3, 2, a@toc@ha
; PWR8-NEXT:    std 26, 64(1) # 8-byte Folded Spill
; PWR8-NEXT:    std 27, 72(1) # 8-byte Folded Spill
; PWR8-NEXT:    addi 3, 3, a@toc@l
; PWR8-NEXT:    lxvd2x 0, 0, 3
; PWR8-NEXT:    addis 3, 2, y@toc@ha
; PWR8-NEXT:    addi 3, 3, y@toc@l
; PWR8-NEXT:    addi 28, 3, -16
; PWR8-NEXT:    addi 3, 4, -16
; PWR8-NEXT:    xxswapd 63, 0
; PWR8-NEXT:    .p2align 4
; PWR8-NEXT:  .LBB0_1: # %for.body
; PWR8-NEXT:    #
; PWR8-NEXT:    lxvd2x 0, 3, 29
; PWR8-NEXT:    vmr 2, 31
; PWR8-NEXT:    addi 27, 28, 16
; PWR8-NEXT:    addi 26, 3, 16
; PWR8-NEXT:    xxswapd 35, 0
; PWR8-NEXT:    bl __mulkf3
; PWR8-NEXT:    nop
; PWR8-NEXT:    xxswapd 0, 34
; PWR8-NEXT:    addi 30, 30, -1
; PWR8-NEXT:    mr 3, 26
; PWR8-NEXT:    cmpldi 30, 0
; PWR8-NEXT:    stxvd2x 0, 28, 29
; PWR8-NEXT:    mr 28, 27
; PWR8-NEXT:    bc 12, 1, .LBB0_1
; PWR8-NEXT:  # %bb.2: # %for.end
; PWR8-NEXT:    li 3, 48
; PWR8-NEXT:    ld 30, 96(1) # 8-byte Folded Reload
; PWR8-NEXT:    ld 29, 88(1) # 8-byte Folded Reload
; PWR8-NEXT:    ld 28, 80(1) # 8-byte Folded Reload
; PWR8-NEXT:    ld 27, 72(1) # 8-byte Folded Reload
; PWR8-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
; PWR8-NEXT:    ld 26, 64(1) # 8-byte Folded Reload
; PWR8-NEXT:    addi 1, 1, 112
; PWR8-NEXT:    ld 0, 16(1)
; PWR8-NEXT:    mtlr 0
; PWR8-NEXT:    blr
entry:
  %0 = load fp128, ptr @a, align 16
  br label %for.body

for.body:                                         ; preds = %for.body, %entry
  %i.06 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
  %arrayidx = getelementptr inbounds [4 x fp128], ptr @x, i64 0, i64 %i.06
  %1 = load fp128, ptr %arrayidx, align 16
  %mul = fmul fp128 %0, %1
  %arrayidx1 = getelementptr inbounds [4 x fp128], ptr @y, i64 0, i64 %i.06
  store fp128 %mul, ptr %arrayidx1, align 16
  %inc = add nuw nsw i64 %i.06, 1
  %exitcond = icmp eq i64 %inc, 4
  br i1 %exitcond, label %for.end, label %for.body

for.end:                                          ; preds = %for.body
  ret void
}

define void @fpext_ctrloop_fp128(ptr %a) nounwind {
; PWR9-LABEL: fpext_ctrloop_fp128:
; PWR9:       # %bb.0: # %entry
; PWR9-NEXT:    li 4, 4
; PWR9-NEXT:    addi 3, 3, -8
; PWR9-NEXT:    mtctr 4
; PWR9-NEXT:    addis 4, 2, y@toc@ha
; PWR9-NEXT:    addi 4, 4, y@toc@l
; PWR9-NEXT:    addi 4, 4, -16
; PWR9-NEXT:    .p2align 5
; PWR9-NEXT:  .LBB1_1: # %for.body
; PWR9-NEXT:    #
; PWR9-NEXT:    lfdu 0, 8(3)
; PWR9-NEXT:    xscpsgndp 34, 0, 0
; PWR9-NEXT:    xscvdpqp 2, 2
; PWR9-NEXT:    stxv 34, 16(4)
; PWR9-NEXT:    addi 4, 4, 16
; PWR9-NEXT:    bdnz .LBB1_1
; PWR9-NEXT:  # %bb.2: # %for.end
; PWR9-NEXT:    blr
;
; PWR8-LABEL: fpext_ctrloop_fp128:
; PWR8:       # %bb.0: # %entry
; PWR8-NEXT:    mflr 0
; PWR8-NEXT:    std 28, -32(1) # 8-byte Folded Spill
; PWR8-NEXT:    std 29, -24(1) # 8-byte Folded Spill
; PWR8-NEXT:    std 30, -16(1) # 8-byte Folded Spill
; PWR8-NEXT:    stdu 1, -64(1)
; PWR8-NEXT:    addis 4, 2, y@toc@ha
; PWR8-NEXT:    addi 30, 3, -8
; PWR8-NEXT:    li 29, 4
; PWR8-NEXT:    std 0, 80(1)
; PWR8-NEXT:    addi 4, 4, y@toc@l
; PWR8-NEXT:    addi 28, 4, -16
; PWR8-NEXT:    .p2align 4
; PWR8-NEXT:  .LBB1_1: # %for.body
; PWR8-NEXT:    #
; PWR8-NEXT:    lfdu 1, 8(30)
; PWR8-NEXT:    addi 28, 28, 16
; PWR8-NEXT:    bl __extenddfkf2
; PWR8-NEXT:    nop
; PWR8-NEXT:    xxswapd 0, 34
; PWR8-NEXT:    addi 29, 29, -1
; PWR8-NEXT:    cmpldi 29, 0
; PWR8-NEXT:    stxvd2x 0, 0, 28
; PWR8-NEXT:    bc 12, 1, .LBB1_1
; PWR8-NEXT:  # %bb.2: # %for.end
; PWR8-NEXT:    addi 1, 1, 64
; PWR8-NEXT:    ld 0, 16(1)
; PWR8-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
; PWR8-NEXT:    ld 29, -24(1) # 8-byte Folded Reload
; PWR8-NEXT:    ld 28, -32(1) # 8-byte Folded Reload
; PWR8-NEXT:    mtlr 0
; PWR8-NEXT:    blr
entry:
  br label %for.body

for.body:
  %i.06 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
  %arrayidx = getelementptr inbounds double, ptr %a, i64 %i.06
  %0 = load double, ptr %arrayidx, align 8
  %ext = fpext double %0 to fp128
  %arrayidx1 = getelementptr inbounds [4 x fp128], ptr @y, i64 0, i64 %i.06
  store fp128 %ext, ptr %arrayidx1, align 16
  %inc = add nuw nsw i64 %i.06, 1
  %exitcond = icmp eq i64 %inc, 4
  br i1 %exitcond, label %for.end, label %for.body

for.end:
  ret void
}

define void @fptrunc_ctrloop_fp128(ptr %a) nounwind {
; PWR9-LABEL: fptrunc_ctrloop_fp128:
; PWR9:       # %bb.0: # %entry
; PWR9-NEXT:    li 4, 4
; PWR9-NEXT:    addi 3, 3, -8
; PWR9-NEXT:    mtctr 4
; PWR9-NEXT:    addis 4, 2, x@toc@ha
; PWR9-NEXT:    addi 4, 4, x@toc@l
; PWR9-NEXT:    addi 4, 4, -16
; PWR9-NEXT:    .p2align 5
; PWR9-NEXT:  .LBB2_1: # %for.body
; PWR9-NEXT:    #
; PWR9-NEXT:    lxv 34, 16(4)
; PWR9-NEXT:    addi 4, 4, 16
; PWR9-NEXT:    xscvqpdp 2, 2
; PWR9-NEXT:    xscpsgndp 0, 34, 34
; PWR9-NEXT:    stfdu 0, 8(3)
; PWR9-NEXT:    bdnz .LBB2_1
; PWR9-NEXT:  # %bb.2: # %for.end
; PWR9-NEXT:    blr
;
; PWR8-LABEL: fptrunc_ctrloop_fp128:
; PWR8:       # %bb.0: # %entry
; PWR8-NEXT:    mflr 0
; PWR8-NEXT:    std 28, -32(1) # 8-byte Folded Spill
; PWR8-NEXT:    std 29, -24(1) # 8-byte Folded Spill
; PWR8-NEXT:    std 30, -16(1) # 8-byte Folded Spill
; PWR8-NEXT:    stdu 1, -64(1)
; PWR8-NEXT:    addis 4, 2, x@toc@ha
; PWR8-NEXT:    addi 30, 3, -8
; PWR8-NEXT:    li 29, 4
; PWR8-NEXT:    std 0, 80(1)
; PWR8-NEXT:    addi 4, 4, x@toc@l
; PWR8-NEXT:    addi 28, 4, -16
; PWR8-NEXT:    .p2align 4
; PWR8-NEXT:  .LBB2_1: # %for.body
; PWR8-NEXT:    #
; PWR8-NEXT:    addi 28, 28, 16
; PWR8-NEXT:    lxvd2x 0, 0, 28
; PWR8-NEXT:    xxswapd 34, 0
; PWR8-NEXT:    bl __trunckfdf2
; PWR8-NEXT:    nop
; PWR8-NEXT:    addi 29, 29, -1
; PWR8-NEXT:    stfdu 1, 8(30)
; PWR8-NEXT:    cmpldi 29, 0
; PWR8-NEXT:    bc 12, 1, .LBB2_1
; PWR8-NEXT:  # %bb.2: # %for.end
; PWR8-NEXT:    addi 1, 1, 64
; PWR8-NEXT:    ld 0, 16(1)
; PWR8-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
; PWR8-NEXT:    ld 29, -24(1) # 8-byte Folded Reload
; PWR8-NEXT:    ld 28, -32(1) # 8-byte Folded Reload
; PWR8-NEXT:    mtlr 0
; PWR8-NEXT:    blr
entry:
  br label %for.body

for.body:
  %i.06 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
  %arrayidx = getelementptr inbounds [4 x fp128], ptr @x, i64 0, i64 %i.06
  %0 = load fp128, ptr %arrayidx, align 16
  %trunc = fptrunc fp128 %0 to double
  %arrayidx1 = getelementptr inbounds double, ptr %a, i64 %i.06
  store double %trunc, ptr %arrayidx1, align 16
  %inc = add nuw nsw i64 %i.06, 1
  %exitcond = icmp eq i64 %inc, 4
  br i1 %exitcond, label %for.end, label %for.body

for.end:
  ret void
}

declare void @obfuscate(ptr, ...) local_unnamed_addr #2
